import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib

matplotlib.rc("font", family='Source Han Sans CN', weight="regular", size="10")

file_path = "csv/IMDB-Movie-Data.csv"
df = pd.read_csv(file_path)

# 统计分类情况：构造一个全为0的数组，列名为分类，如果一条数据出现了这个分类，0就变为1
temp_list = df["Genre"].str.split(",").tolist()  # [[], [], []]
genre_list = list(set([i for j in temp_list for i in j]))

zeros_df = pd.DataFrame(np.zeros((df.shape[0], len(genre_list))), columns=genre_list)

for i in range(df.shape[0]):
    zeros_df.loc[i, temp_list[i]] = 1
genre_count = zeros_df.sum(axis=0).sort_values()

plt.figure(figsize=(15, 10))

plt.xlabel("数量")
plt.ylabel("分类")
plt.title("1000部电影分类条形图", weight="bold", size="16")
plt.yticks(range(len(genre_count.index)), genre_count.index)

plt.barh(range(len(genre_count.index)), genre_count.values, 0.5)
plt.show()
